import requests
import re
from lxml import etree
import os

namedir = "day3_homework"
if not os.path.exists(namedir):
    os.mkdir(namedir)
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36'
}
url = "https://www.pearvideo.com/category_4"
media = requests.get(url=url, headers=headers).text
media_tree = etree.HTML(media)
media_tree = media_tree.xpath("//div[@class='vervideo-bd']/a/@href")
for page in media_tree:
    media_tree = "https://www.pearvideo.com/" + page
    next_media = requests.get(url=media_tree, headers=headers).text
    next_tree = etree.HTML(next_media)
    title = namedir + "/" + next_tree.xpath('//div[@class="video-tt-box"]//h1/text()')[0] + ".mp4"
    next_media = next_tree.xpath("//div[@id='detailsbd']/div[1]/script[1]/text()")[0]
    rule = 'srcUrl="(.*?)"'
    # list 为空
    # result = re.findall(rule, next_media)[0]
    # https: // video.pearvideo.com / mp4 / short / 20201010 / cont - 1701013 - 15421940 - hd.mp4
    # https: // video.pearvideo.com / mp4 / short / 20201010 / 1602342214027 - 15421940 - hd.mp4
    result='https://video.pearvideo.com/mp4/short/20201010/cont-1701013-15421940-hd.mp4'
    response = requests.get(url=result, headers=headers).content

    with open(title, "wb") as fp:
        fp.write(response)
